# 封装函数
import requests
import re


# 定义函数
def get_html():
    # 爬虫打开的浏览器的网页
    url = 'http://weather.com.cn/weather1d/101010100.shtml'
    resp = requests.get(url)  # 对象
    # 设置编码格式
    resp.encoding = 'utf-8'
    return resp.text  # 返回爬取的结果


def parse_html(html_str):
    # 从html中提取数据
    city = re.findall('<span class="name">([\u4e00-\u9fff]*)</span>', html_str)
    weather = re.findall('<span class="weather">([\u4e00-\u9fff]*)</span>', html_str)
    wd = re.findall('<span class="wd">(.*)</span>', html_str)
    zs = re.findall('<span class="zs">([\u4e00-\u9fff]*)</span>', html_str)

    # 将提取出来的数据进行打包
    lst = []
    for a, b, c, d in zip(city, weather, wd, zs):
        lst.append([a, b, c, d])

    return lst
